rm(list = ls(all.names = TRUE))
gc(reset=T)
rm(list=ls())

# Packages -------
suppressMessages({ 
  pacman::p_load(bipartite, ggplot2, tidyr, dplyr, magrittr, ineq, reldist, gridGraphics, cowplot, ggrepel, stringr, gridExtra, gtable, ggwordcloud, kableExtra, pander, ggsci, maps, viridis, lsa, reshape2, data.table) })

suppressMessages({ 
  pacman::p_load(parallel, foreach, doParallel, maxnodf, e1071, devtools, EconGeo, econetwork, lpbrim) })


# Read in Full Matrix Data -----------
# NODF_df_20_50 <- read.csv("/Path/Data/COMBINED_OUTPUT_R_NODF_Citations.csv.gz")

# ## Warning: Very Large Data
# Matrix_df_20_50 <- read.csv("/Path/Data/COMBINED_OUTPUT_R_NODF_Matrix_Citations.csv.gz") %>% subset(Citation_Deflated=="Deflated") %>% subset(Citation_Window==5) %>% subset(Citation_Logged=="Log") %>% subset(Sample_Stratified=="Censored20")

# Ideal_Matrix_df_20_50 <- read.csv("/Path/Data/COMBINED_OUTPUT_R_Ideal_NODF_Matrix_Citations.csv.gz") %>% subset(Citation_Deflated=="Deflated") %>% subset(Citation_Window==5) %>% subset(Citation_Logged=="Log") %>% subset(Sample_Stratified=="Censored20")

# save.image("/Path/Input_Data/INPUT_R_Vignette_Matrices_Data.RData")

load("/Path/Input_Data/INPUT_R_Vignette_Matrices_Data.RData")

# Regions | Read in Data ----------
# Regions <- read.csv("/Path/Input_Data/INPUT_R_Nation_to_Region_Core.csv") %>%
#   dplyr::mutate(Region = case_when(Nation=="PUERTO_RICO" ~ "LATIN_AND_SOUTH_AMERICA",
#                                    Nation=="CUBA" ~ "LATIN_AND_SOUTH_AMERICA", Nation=="GREENLAND" ~ "WESTERN_EUROPE",
#                                    Nation=="FRENCH_POLYNESIA" ~ "OCEANIA", Nation=="BRUNEI_DARUSSALAM" ~ "SOUTH_EAST_ASIA",
#                                    Nation=="COMOROS" ~ "SUB_AFRICA", Nation=="CABO_VERDE" ~ "SUB_AFRICA",
#                                    Nation=="KIRIBATI" ~ "OCEANIA", Nation=="LAO_PDR" ~ "SOUTH_EAST_ASIA",
#                                    Nation=="MOLDOVA" ~ "EASTERN_EUROPE_USSR", Nation=="MAURITANIA" ~ "MENA",
#                                    Nation=="NEW_CALEDONIA" ~ "OCEANIA", Nation=="ISRAEL"~"WESTERN_EUROPE",
#                                    Nation=="SAN_MARINO" ~ "WESTERN_EUROPE", Nation=="SAO_TOME_AND_PRINCIPE" ~ "SUB_AFRICA",
#                                    Nation=="TUVALU" ~ "OCEANIA", Nation=="ST_VINCENT_AND_THE_GRENADINES" ~ "CARIBBEAN",
#                                    Nation=="MAURITIUS" ~ "SUB_AFRICA", Nation=="RUSSIA" ~ "EASTERN_EUROPE_USSR",
#                                    Nation=="JAPAN" ~ "Japan, South Korea,\nand Singapore", Nation=="SOUTH_KOREA" ~ "Japan, South Korea,\nand Singapore", Nation=="SINGAPORE" ~ "Japan, South Korea,\nand Singapore",
#                                    Nation=='UNITED_KINGDOM' ~ "UK, Australia\nNZ and Canada", Nation=='CANADA' ~ "UK, Australia\nNZ and Canada", Nation=='AUSTRALIA' ~ "UK, Australia\nNZ and Canada", 
#                                    Nation=='NEW_ZEALAND' ~ "UK, Australia\nNZ and Canada", Nation=='UNITED_STATES' ~ "USA",
#                                    Nation=='CHINA' ~ "China", TRUE ~ as.character(Region))) %>%
#   dplyr::mutate(Region = case_when(Region=="CARIBBEAN" ~ "Latin America\nand Caribbean",
#                                    Region=="WESTERN_EUROPE" ~ "Western Europe",
#                                    Region=="SUB_AFRICA" ~ "MENA and\nSub-Saharan Africa",
#                                    Region=="EASTERN_EUROPE_USSR" ~ "Russia and\nEastern Europe",
#                                    Region=="EAST_ASIA" ~ "Asia and Oceania",
#                                    Region=="CENTRAL_ASIA" ~ "Asia and Oceania",
#                                    Region=="SOUTH_EAST_ASIA" ~ "Asia and Oceania",
#                                    Region=="LATIN_AND_SOUTH_AMERICA" ~ "Latin America\nand Caribbean",
#                                    #Region=="NORTH_AMERICA" ~ "US, UK, and Canada",
#                                    Region=="MENA" ~ "MENA and\nSub-Saharan Africa",
#                                    Region=="OCEANIA" ~ "Asia and Oceania",
#                                    TRUE~as.character(Region)))%>%
#   as.data.frame() %>%
#   dplyr::select("Nation","Region") %>%
#   dplyr::rename(Label=Nation)

## Fixing problematic countries to join regions with data ------
# Regions2 <- Regions %>%
#   mutate(Label = as.character(Label), 
#          Label = case_when(Label == "U_ARAB_EMIRATES" ~ "United Arab Emirates",
#                            Label == "BAHAMAS_THE" ~ "Bahamas",
#                            Label == "COTE_D'IVOIRE" ~ "Ivory Coast",
#                            Label == "CONGO_REP" ~ "Republic of the Congo",
#                            Label == "REP_OF_GEORGIA" ~ "Georgia",
#                            Label == "GAMBIA_THE" ~ "Gambia",
#                            Label == "EQUATORIAL_GUINEA" ~ "Papua New Guinea",
#                            Label == "KYRGYZ_REPUBLIC" ~ "Kyrgyzstan",
#                            Label == "LAO_PDR" ~ "Laos",
#                            Label == "MACAO_SAR_PEOPLES_R_CHINA" ~ "Macao",
#                            Label == "KOREA_DEM_REP" ~ "South Korea",
#                            Label == "SINT_MAARTEN_DUTCH_PART" ~ "Sint Maarten",
#                            Label == "YEMEN_REP" ~ "Yemen",
#                            Label == "CONGO_DEM_REP" ~ "Democratic Republic of the Congo",
#                            TRUE ~ Label)) %>%
#   mutate(Label = str_to_title(str_replace(str_replace(str_replace(str_replace(Label, "_", " "), "_", " "), "_", " "), "_", " ")), Label = str_replace(str_replace(str_replace(Label, "And", "and"), "Of", "of"), "The", "the"))

# Read in Country Edgelist Data ----------------------

# Edgelist_df <- read.csv("/Path/Data/OUTPUT_Python_MAG_Ecology_Citations_GRID_Edgelist.csv.gz")

#countryGRID <- Edgelist_df %>% dplyr::select(COUNTRY, GRID) %>% 
#  unique() %>% 
#  arrange(COUNTRY)

#countryGRIDRegion <- left_join(countryGRID, Regions2, by=c("COUNTRY" = "Label"))

# countryGRIDRegion <- countryGRIDRegion %>% mutate(Region = 
#                                                     case_when(COUNTRY=="Saint Kitts and Nevis" ~ "Latin America\nand Caribbean",
#                                                               COUNTRY=="North Korea" ~ "Asia and Oceania",
#                                                               COUNTRY=="Aland Islands" ~ "Western Europe",
#                                                               COUNTRY=="Jersey" ~ "Western Europe",
#                                                               COUNTRY=="Faroe Islands" ~ "Western Europe",
#                                                               COUNTRY=="Cape Verde" ~ "MENA and\nSub-Saharan Africa",
#                                                               COUNTRY=="Gibraltar" ~ "Western Europe",
#                                                               COUNTRY=="Monaco" ~ "Western Europe",
#                                                               COUNTRY=="Montserrat" ~ "Latin America\nand Caribbean",
#                                                               COUNTRY=="Guadeloupe" ~ "Latin America\nand Caribbean",
#                                                               TRUE ~ as.character(Region)))

# Number of Countries and Universities ---------

#unique(Matrix_df_20_50$Nations) %>% length()

#4794

#data.frame(GRID=unique(Matrix_df_20_50$Nations)) %>% inner_join(., countryGRIDRegion,by=c("GRID")) %>% select(COUNTRY) %>% unique() %>% dim()

#176

#sum((read.csv("/Path/Data/INPUT_Global_Ecology_of_Citations_Number_of_Docs_per_Field.csv.gz") %>% subset(Year>=1990 & Year<=2012))$Number_of_Documents)

# 66231469

# Adding in Country and Regions -------------

Matrix_df_20_50 <- rename(Matrix_df_20_50, GRID=Nations)

#Matrix_df_20_50 <- inner_join(Matrix_df_20_50, countryGRIDRegion, 
#                              by=c("GRID"))

Ideal_Matrix_df_20_50 <- rename(Ideal_Matrix_df_20_50, GRID=Nations)

#Ideal_Matrix_df_20_50 <- inner_join(Ideal_Matrix_df_20_50, 
#                                    countryGRIDRegion, by=c("GRID"))

# N.B. - Uncomment the Discipline You Want Ploted
#Input_Discipline <- "Mathematics"
#Input_Discipline <- "Biology"
Input_Discipline <- "Computer Science"
#Input_Discipline <- "Economics"

Input_Year <- 2010

# Figure 1: Topics Table, Matrix for ALL GRID, Matrix for 3 groups of GRID ----

## Topics table --------
### Adding up countries present in each topic per discipline ------

TMYEAR_sum <- Matrix_df_20_50 %>% 
  subset(Year==Input_Year & Discipline==Input_Discipline) %>% 
  group_by(Topics) %>% 
  arrange(Topics) %>% 
  mutate(Topic_Numb = cur_group_id()) %>% 
  ungroup() %>% 
  group_by(GRID)%>%
  unique()%>% 
  group_by(Topics, Topic_Numb) %>% 
  summarise(Total_Presence=sum(Value)) %>% 
  ungroup()

### finding top, bottom, mean, median -------

TMYEAR_topics <- TMYEAR_sum %>%
  mutate(Mean = mean(Total_Presence), Median = median(Total_Presence),
         Mean_absdiff = abs(Total_Presence - Mean), Median_absdiff = abs(Total_Presence - Median))  %>%
  slice(which.max(Total_Presence), which.min(Total_Presence),
        which.min(Mean_absdiff), which.min(Median_absdiff)) %>%
  mutate(Type = c("Top", "Bottom", "Mean", "Median"),
         # Topics = case_when(Topics=="Mathematical Optimization" ~ "Mathematical\nOptimization", Topics=="Computational Science" ~ "Computational\nScience", TRUE ~ as.character(Topics)),
         `Topics (Topic Number)` = paste(Topics, " (", Topic_Numb, ")", sep = "")) %>%
  dplyr::select(Type, `Topics (Topic Number)`, Total_Presence) %>%
  mutate(Type = factor(Type, levels = c("Top", "Mean", "Median", "Bottom"))) %>%
  arrange(Type) %>% rename(`Total Presence` = Total_Presence) 

### Making the table -------- 

mytheme <- gridExtra::ttheme_minimal(base_size = 10, 
                                     padding = unit(c(10, 6), "mm"), #length, height of rows
                                     core = list(fg_params=list(cex = 1.5, hjust=0, x=0.1,vjust=1, y=0.8)),
                                     colhead = list(fg_params=list(cex = 1.5, hjust=0, x=0.1)))

x <- gridExtra::tableGrob(TMYEAR_topics, theme = mytheme, rows=NULL)

p <- gtable::gtable_add_grob(x, grobs = rectGrob(gp=gpar(fill =NA, lwd=2)), t = 1, b = 5, l = 1, r = ncol(TMYEAR_topics))

p1 <- gtable::gtable_add_grob(p, grobs = rectGrob(gp = gpar(fill = NA, lwd = 2)),
                      t = 1, l = 1, r = ncol(p))

# plot_grid(p1)

### 2023 09 23 - Double Check -----

Matrix_df_GRIDYEAR <- Matrix_df_20_50 %>%
  subset(Year==Input_Year & Discipline==Input_Discipline) %>% 
  ungroup() %>%
  group_by(Topics, GRID) %>%
  summarize(Binary_Presence = Value) %>%
  tidyr::drop_na() %>%
  as.data.frame() %>%
  unique()

### getting to be ordered highest presences first -------

order_GRID <- Matrix_df_GRIDYEAR %>% 
  #dplyr::select(-c(Region, Value, Year)) %>%
  group_by(Topics, GRID) %>%
  unique() %>% ungroup() %>% group_by(GRID,) %>%
  summarise(Total = sum(Binary_Presence)) %>% arrange((Total)) %>%
  unite("top_GRID", GRID, sep = "_", remove = FALSE) %>%
  mutate(top_GRID = factor(top_GRID, levels = top_GRID))

order_GRIDtopics <- Matrix_df_GRIDYEAR %>% 
  group_by(Topics) %>%
  summarise(Col_Total = sum(Binary_Presence)) %>% 
  arrange(desc(Col_Total)) %>%
  unite("top_topic", Topics, sep = "_", remove = FALSE) %>%
  mutate(top_topic = factor(top_topic, levels = top_topic))

# total is how many times the region was present in a topic per discipline (row total)
# col total is how many times the topic was present per discipline

# Data for nested binary matrix ----------

hmpGRID <- Matrix_df_GRIDYEAR %>% 
  group_by(Topics) %>% arrange(Topics) %>% 
  mutate(Topic_Numb = cur_group_id()) %>% ungroup()

bmxnGRID <- hmpGRID %>% left_join(order_GRID, by=c("GRID"))
bmxnGRID <- bmxnGRID %>% left_join(order_GRIDtopics, by=c("Topics"))

bmxnGRID <- bmxnGRID %>% arrange((Total))
bmxnGRID$top_GRID <- factor(bmxnGRID$top_GRID, levels=unique(bmxnGRID$top_GRID))
bmxnGRID <- bmxnGRID %>% arrange(desc(Col_Total))
bmxnGRID$top_topic <- factor(bmxnGRID$top_topic, levels=unique(bmxnGRID$top_topic))

#bringing in GRID names
# GRIDlabels <- read.csv("/Path/Input_Data/INPUT_GRID.csv")
# bmxnGRID <- inner_join(bmxnGRID, GRIDlabels[, c("gridid", "grid")], by=c("GRID"="gridid"))

bmxnGRID <- bmxnGRID%>%  
  mutate(grid_short = as.character(GRID)) %>%
  mutate_at("grid_short", stringr::str_trunc, width = 25, ellipsis = '...')

# plotting all GRID matrix NESTED -------

pgrid <- ggplot(data=bmxnGRID) +
  scale_x_discrete(breaks = bmxnGRID$top_topic,
                   labels = bmxnGRID$Topic_Numb,
                   expand = c(0.001, 0.001)) + 
  scale_y_discrete(breaks = bmxnGRID$top_GRID,
                   labels=bmxnGRID$grid_short, 
                   expand = c(0.00, 0.00)) +
  geom_tile(aes(x=top_topic, y=top_GRID, fill=factor(Binary_Presence))) +
  theme_bw() + scale_fill_manual(values=c("white", "black")) +
  theme(legend.position = "none") + labs(x=NULL, y=NULL) +
  theme(plot.margin = unit(c(0,0.5,0.2,0.5), "cm"),
        axis.text.y=element_blank(), 
        axis.ticks.y=element_blank(),
        #text = element_text(size=10),   
        panel.grid = element_blank()) +
  theme(text = element_text(size = 18),
        axis.text.x = element_text(size=10,angle = 90))+
  geom_vline(xintercept = seq(0.5, length(bmxnGRID$top_topic), by = 1), color="black", size=0.5, alpha=0.1) +
  geom_hline(yintercept = seq(0.5, length(bmxnGRID$top_GRID), by = 10), color="black", size=0.5, alpha=0.05)

#pgrid

## Plotting all GRID matrix UN-NESTED ------------


praw <- ggplot(data=bmxnGRID, aes(x=Topic_Numb, y=grid_short, fill=factor(Binary_Presence))) +
  geom_tile() +
  theme_bw() + scale_fill_manual(values=c("white", "black")) +
  scale_x_continuous(expand = c(0.001, 0.001)) + scale_y_discrete(expand = c(0.00, 0.00)) +
  theme(legend.position = "none") + labs(x=NULL, y=NULL) +
  theme(plot.margin = unit(c(0,0.5,0.2,0.5), "cm")) + #top, right, bottom, left
  theme(axis.text.y=element_blank(), axis.ticks.y=element_blank(),
        text = element_text(size=16),   
        panel.grid = element_blank()) +
  geom_vline(xintercept = seq(0.5, length(unique(bmxnGRID$top_topic)), by = 1), color="black", size=0.5, alpha=0.1) + 
  geom_hline(yintercept = seq(0.5, length(unique(bmxnGRID$grid_short)), by = 10), color="black", size=0.5, alpha=0.05)

# plotting 10 per group matrix ---------
## with GRID labels, subsetting top and bottom --------

topbottomGRID <- rbind(bmxnGRID%>%group_by(Topics)%>%slice_head(n=10)%>%mutate(Type="Bottom10"),
                       bmxnGRID%>%group_by(Topics)%>%slice(349:358)%>%mutate(Type="Middle10"),
                       bmxnGRID%>%group_by(Topics)%>%slice_tail(n=10)%>%mutate(Type="Top10")) %>%
  select(top_topic,Topic_Numb,top_GRID,grid_short,Binary_Presence,Type) %>% unique()

### plotting -------------

myplot <- ggplot(data=topbottomGRID) +
  scale_x_discrete(breaks = topbottomGRID$top_topic,
                   labels = topbottomGRID$Topic_Numb,
                   expand = c(0.001, 0.001)) + 
  scale_y_discrete(breaks = topbottomGRID$top_GRID,
                   labels = topbottomGRID$grid_short, 
                   expand = c(0.00, 0.00)) +
  geom_tile(aes(x=top_topic, 
                y=top_GRID, 
                fill=factor(Binary_Presence)), 
            color="black") +
  theme_bw() + 
  scale_fill_manual(values=c("white", "black")) +
  theme(legend.position = "none") + 
  labs(x=NULL, y=NULL) +
  theme(plot.margin = unit(c(0,0.5,0.2,0.5), "cm")) + 
  geom_hline(yintercept=10.5) + 
  geom_hline(yintercept=20.5)  +
  theme(text = element_text(size = 18),
    axis.text.x = element_text(size=10,angle = 90))

x_cols <- rep(pal_aaas()(length(unique(topbottomGRID$Type))), each=10) #each=10

pgrid10 <- myplot + theme(axis.text.y = element_text(colour=rev(x_cols)))

#library(scales)
#show_col(pal_nejm("default")(8))

rightside <- cowplot::plot_grid(NULL, plot_grid(NULL, p1, NULL, nrow=1, rel_widths = c(0.8,5,-0.5)), pgrid10,
                       labels =c("B", "", "C"), hjust=0, label_x = 0.01, vjust=1.3,
                       ncol=1, rel_heights=c(-0.1, 2, 5.6))

#plot_grid(pgrid, NULL, rightside, ncol=3, rel_widths = c(1.5, -0.03, 2.2), labels=c("A","", ""), vjust=1, hjust=0)

#putting all together 4 figs

rightside <- plot_grid(NULL, plot_grid(NULL, p1, NULL, nrow=1, rel_widths = c(1.1,5,0.5)), pgrid10,
                       ncol=1, rel_heights=c(-0.15, 2, 5.2), labels=c("C", "", "D"), hjust=0, label_x = 0.033)

title <- ggdraw() + 
  draw_label(paste(Input_Discipline,as.character(Input_Year),
                   sep=" "), fontface='bold')

pdf(paste("/Path/FIGURE_Research_Policy_Vignette_",Input_Discipline,"_",as.character(Input_Year),".pdf",sep=""),width=(9*1.5), height=9*1)
plot_grid(title,
          plot_grid(plot_grid(praw, pgrid, ncol=1, labels=c("A", "B"),  hjust=0), NULL, rightside, ncol=3, rel_widths = c(1, -0.03, 1.5)),
          ncol=1, rel_heights=c(0.1, 1)) 
dev.off()

